fileNam <- "/Users/immbio/Desktop/HumanHeartCarTrans2/data/Human_heart_ExpDH.rds"
seuratExpDH <- readRDS(fileNam)
seuratExpDH$clusterName <- "clusterName"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "0" )] <- "Fb1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "1" )] <- "PerivFb1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "2" )] <- "Mph2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "3" )] <- "BEC1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "4" )] <- "Fb2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "5" )] <- "CM"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "6" )] <- "Tcell1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "7" )] <- "BEC2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "8" )] <- "VSMC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "9" )] <- "Mph1"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "10" )] <- "BEC3"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "11" )] <- "NC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "12" )] <- "BaroRec"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "13" )] <- "Bcell"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "14" )] <- "Fb3"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "15" )] <- "Tcell2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "16" )] <- "LEC"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "17" )] <- "PerivFb2"
seuratExpDH$clusterName[which(seuratExpDH$RNA_snn_res.0.4 %in% "18" )] <- "Adipoc"
colclusterName <- c("#67001f", "#f4a582","#D53E4F", "#B45B5C","#003c30","#01665e","#66C2A5", "#BEAEF8","#BEAED4", "#c7eae5", "#B09C85", "#4e5a4c","#393A3F","pink","#4588CA","#3299CA","#FCC80B","#FEE60B","#628395")
names(colclusterName) <- c("CM","Fb1","Fb2","Fb3","PerivFb1","PerivFb2","VSMC","BEC1","BEC2","BEC3","LEC","NC","BaroRec","Adipoc","Mph1","Mph2","Tcell1","Tcell2","Bcell")
coldiseaseCond <- c("#dfc27d","#BE3144","#f4a582","#B45B5C","#8c510a","#202547","#355C7D","#779d8d", "#01665e", "#3288BD", "#BEAED4")
names(coldiseaseCond) <- c("donorheart", "explant", "visit1", "visit2", "visit3", "visit4", "visit5", "visitX1", "visitX2", "visitX3", "visitX4")
# Combine slots
seuratExpDH$patient_diseaseCond <- paste0(seuratExpDH$patient, '_', seuratExpDH$diseaseCond)
#table(seuratExpDH$patient_diseaseCond)
seuratExpDH$patient_clusterName <- paste0(seuratExpDH$patient, '_', seuratExpDH$clusterName)
#table(seuratExpDH$patient_clusterName)
seuratExpDH$diseaseCond_clusterName <- paste0(seuratExpDH$diseaseCond, '_', seuratExpDH$clusterName)
table(seuratExpDH$diseaseCond_clusterName)
##
## donorheart_Adipoc donorheart_BaroRec donorheart_Bcell donorheart_BEC1 donorheart_BEC2
## 78 270 88 5834 1743
## donorheart_BEC3 donorheart_CM donorheart_Fb1 donorheart_Fb2 donorheart_Fb3
## 963 6129 8213 912 296
## donorheart_LEC donorheart_Mph1 donorheart_Mph2 donorheart_NC donorheart_PerivFb1
## 97 462 4097 579 8851
## donorheart_PerivFb2 donorheart_Tcell1 donorheart_Tcell2 donorheart_VSMC explant_Adipoc
## 273 550 70 1118 665
## explant_BaroRec explant_Bcell explant_BEC1 explant_BEC2 explant_BEC3
## 1874 927 15142 4267 1297
## explant_CM explant_Fb1 explant_Fb2 explant_Fb3 explant_LEC
## 5984 20784 16498 1515 899
## explant_Mph1 explant_Mph2 explant_NC explant_PerivFb1 explant_PerivFb2
## 2323 11455 1432 14914 629
## explant_Tcell1 explant_Tcell2 explant_VSMC
## 4203 703 2425
# Set factor levels and identities
seuratExpDH$clusterName <- factor(seuratExpDH$clusterName, levels = c("CM","Fb1","Fb2","Fb3","PerivFb1","PerivFb2","VSMC","BEC1","BEC2","BEC3","LEC","NC","BaroRec","Adipoc","Mph1","Mph2","Tcell1","Tcell2","Bcell"))
Idents(seuratExpDH) <- seuratExpDH$clusterName
genes <- data.frame(gene=rownames(seuratExpDH)) %>%
mutate(geneID=sub("^.*\\.", "", gene))
# Create selGenes with geneID vector, join, and filter
selGenes <- data.frame(geneID = rev(c("TTN", "MYBPC3", "RYR2", "NEBL", "TNNT2", "CMYA5", "COL6A3", "DCN", "FBN1", "C7", "PDGFRA", "CDH19", "PDGFRB","ITGA7","RGS5", "NOTCH3", "MYH11", "ACTA2","PECAM1", "VWF", "EGFL7", "POSTN", "ITGA10", "CDH11","CCL21", "PROX1", "FLT4", "NRXN1", "ANK3", "PTPRZ1", "ASIC2", "PIEZO2", "MTHFD1L", "ACACB", "PLIN1", "GPAM", "CD163", "MRC1", "SIGLEC1", "STAB1", "CSF1R", "MERTK", "IL7R", "PTPRC", "CD2", "KIT", "CPA3", "IL18R1", "MS4A1", "PAX5", "FCRL5","IGKC"))) %>%
left_join(., genes, by = "geneID") %>%
filter(gene != "ENSG00000232995.RGS5")
# Run dot plot
p <-dittoSeq::dittoDotPlot(seuratExpDH, vars = selGenes$gene, group.by = "clusterName") + RotatedAxis() + coord_flip() + scale_color_viridis(option= "F")
#Differential analysis using muscat https://github.com/HelenaLC/muscat
#Preparation: Set as ScExperiment
sceExpDH <- as.SingleCellExperiment(seuratExpDH)
#Compute sum factors and normalize
sceExpDH <- computeLibraryFactors(sceExpDH)
sceExpDH <- logNormCounts(sceExpDH)
# Step 1: Add metadata to colData
colData(sceExpDH)$sample_id <- sceExpDH$Sample
colData(sceExpDH)$cluster_id <- sceExpDH$clusterName
colData(sceExpDH)$group_id <- sceExpDH$diseaseCond
# Step 2: Standardize structure
sceExpDH <- prepSCE(sceExpDH,
kid = "cluster_id",
gid = "group_id",
sid = "sample_id")
# Step 3: Pseudobulk aggregation
pb <- aggregateData(sceExpDH,
assay = "counts", fun = "sum",
by = c("cluster_id", "sample_id"))
# run pseudobulk (aggregation-based) DS analysis (analyze differential gene expression in specific cell subpopulations or states, comparing conditions)
ds_pb <- pbDS(pb, method = "edgeR")
## | | | 0% | |===== | 5% | |========= | 11% | |============== | 16% | |=================== | 21% | |======================== | 26% | |============================ | 32% | |================================= | 37% | |====================================== | 42% | |=========================================== | 47% | |=============================================== | 53% | |==================================================== | 58% | |========================================================= | 63% | |============================================================== | 68% | |================================================================== | 74% | |======================================================================= | 79% | |============================================================================ | 84% | |================================================================================= | 89% | |===================================================================================== | 95% | |==========================================================================================| 100%
#print(ds_pb)
#run pseudobulk multidimensional scaling (explore and visualize global patterns of expression similarity/differences across samples or groups)
pb_mds <- pbMDS(pb)
# use very distinctive shaping of groups & change cluster colors
#pb_mds <- pb_mds +
# scale_shape_manual(values = c(17, 4)) +
# scale_color_manual(values = RColorBrewer::brewer.pal(8, "Set2"))
# change point size & alpha
#pb_mds$layers[[1]]$aes_params$size <- 5
#pb_mds$layers[[1]]$aes_params$alpha <- 0.6
print(pb_mds)
#GSEA with GO for Fibroblast Clusters
markerGenesExpDH <- read.delim("~/Desktop/HumanHeartCarTrans2/analysis/ExpDHmarkerGenesRNA_snn_res.0.4", header = TRUE, sep = "\t")
## adjust table
markerG <- markerGenesExpDH %>%
filter(avg_log2FC > 0.5) %>%
filter(cluster %in% c(0, 4, 14)) %>% # Fb1 = Cluster 0, Fb2 = Cluster 4, Fb 3 = Cluster 14
mutate(Gene = sub("^.*\\.", "", gene),
EnsID = sub("\\..*", "", gene))
## GSEA
ego <- enrichGO(gene = unique(markerG$EnsID),
OrgDb = "org.Hs.eg.db",
keyType = 'ENSEMBL',
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.05)
ego <- setReadable(ego, OrgDb = org.Hs.eg.db)
dotplot(ego, showCategory=30)
# Convert to a numeric column for plotting
convert_gene_ratio <- function(df) {
df %>% mutate(GeneRatioNum = sapply(GeneRatio, function(x) {
parts <- strsplit(x, "/")[[1]]
as.numeric(parts[1]) / as.numeric(parts[2])
}))
}
split_results <- lapply(split_results, convert_gene_ratio)
dotplot_list <- lapply(names(split_results), function(cluster_name) {
df <- split_results[[cluster_name]]
# limit to top N GO terms
df <- df %>% arrange(p.adjust) %>% head(30)
ggplot(df, aes(x = GeneRatioNum, y = reorder(Description, GeneRatioNum))) +
geom_point(aes(size = Count, color = p.adjust)) +
scale_color_continuous(low = "red", high = "blue", name = "Adj. p-value") +
scale_size_continuous(name = "Gene Count") +
labs(title = paste("GO Enrichment for Cluster", cluster_name),
x = "Gene Ratio",
y = "GO Term") +
theme_bw(base_size = 10) +
theme(legend.position = "right")
})
# Name the plots list
names(dotplot_list) <- names(split_results)
#Display plots
for (p in dotplot_list) {
print(p)
}
# GSEA for top 500 genes divided by condition ## by all clusters
# Convert to a numeric column for plotting
convert_gene_ratio_con <- function(df) {
df %>% mutate(GeneRatioNum = sapply(GeneRatio, function(x) {
parts <- strsplit(x, "/")[[1]]
as.numeric(parts[1]) / as.numeric(parts[2])
}))
}
split_results_con <- lapply(split_results_con, convert_gene_ratio)
dotplot_list_con <- lapply(names(split_results_con), function(cluster_name) {
df <- split_results_con[[cluster_name]]
# limit to top N GO terms
df <- df %>% arrange(p.adjust) %>% head(30)
ggplot(df, aes(x = GeneRatioNum, y = reorder(Description, GeneRatioNum))) +
geom_point(aes(size = Count, color = p.adjust)) +
scale_color_continuous(low = "red", high = "blue", name = "Adj. p-value") +
scale_size_continuous(name = "Gene Count") +
labs(title = paste("GO Enrichment for Cluster", cluster_name),
x = "Gene Ratio",
y = "GO Term") +
theme_bw(base_size = 10) +
theme(legend.position = "right")
})
# Name the plots list
names(dotplot_list_con) <- names(split_results_con)
# save dotplots as pdf in a loop
for (name in names(dotplot_list_con)) {
ggsave(filename = paste0("GO_dotplot_", name, ".pdf"),
plot = dotplot_list_con[[name]],
width = 9,
height = 8)
}
## session info
``` r
date()
## [1] "Thu Oct 9 15:36:08 2025"
sessionInfo()
## R version 4.5.1 (2025-06-13)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] grid stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] NCmisc_1.2.0 VennDiagram_1.7.3 futile.logger_1.4.3
## [4] ggupset_0.4.1 gridExtra_2.3 DOSE_4.2.0
## [7] enrichplot_1.28.4 msigdbr_25.1.1 org.Hs.eg.db_3.21.0
## [10] AnnotationDbi_1.70.0 clusterProfiler_4.16.0 multtest_2.64.0
## [13] metap_1.12 scater_1.35.0 scuttle_1.18.0
## [16] destiny_3.22.0 circlize_0.4.16 edgeR_4.6.3
## [19] limma_3.64.3 muscat_1.22.0 viridis_0.6.5
## [22] viridisLite_0.4.2 lubridate_1.9.4 forcats_1.0.1
## [25] stringr_1.5.2 purrr_1.1.0 readr_2.1.5
## [28] tidyr_1.3.1 tibble_3.3.0 tidyverse_2.0.0
## [31] dplyr_1.1.4 SingleCellExperiment_1.30.1 SummarizedExperiment_1.38.1
## [34] Biobase_2.68.0 GenomicRanges_1.60.0 GenomeInfoDb_1.44.3
## [37] IRanges_2.42.0 S4Vectors_0.46.0 BiocGenerics_0.54.0
## [40] generics_0.1.4 MatrixGenerics_1.20.0 matrixStats_1.5.0
## [43] pheatmap_1.0.13 ggpubr_0.6.1 ggplot2_4.0.0
## [46] Seurat_5.3.0 SeuratObject_5.2.0 sp_2.2-0
##
## loaded via a namespace (and not attached):
## [1] igraph_2.1.4 ica_1.0-3 plotly_4.11.0
## [4] Formula_1.2-5 tidyselect_1.2.1 bit_4.6.0
## [7] doParallel_1.0.17 clue_0.3-66 lattice_0.22-7
## [10] rjson_0.2.23 blob_1.2.4 S4Arrays_1.8.1
## [13] pbkrtest_0.5.5 parallel_4.5.1 png_0.1-8
## [16] plotrix_3.8-4 cli_3.6.5 ggplotify_0.1.3
## [19] goftest_1.2-3 VIM_6.2.6 variancePartition_1.38.1
## [22] textshaping_1.0.3 BiocNeighbors_2.2.0 uwot_0.2.3
## [25] curl_7.0.0 mime_0.13 evaluate_1.0.5
## [28] tidytree_0.4.6 ComplexHeatmap_2.24.1 stringi_1.8.7
## [31] backports_1.5.0 lmerTest_3.1-3 qqconf_1.3.2
## [34] httpuv_1.6.16 magrittr_2.0.4 rappdirs_0.3.3
## [37] splines_4.5.1 sctransform_0.4.2 ggbeeswarm_0.7.2
## [40] DBI_1.2.3 jquerylib_0.1.4 smoother_1.3
## [43] withr_3.0.2 corpcor_1.6.10 systemfonts_1.2.3
## [46] reformulas_0.4.1 class_7.3-23 lmtest_0.9-40
## [49] formatR_1.14 htmlwidgets_1.6.4 fs_1.6.6
## [52] ggrepel_0.9.6 labeling_0.4.3 fANCOVA_0.6-1
## [55] SparseArray_1.8.1 DESeq2_1.48.2 ranger_0.17.0
## [58] DEoptimR_1.1-4 reticulate_1.43.0 hexbin_1.28.5
## [61] zoo_1.8-14 XVector_0.48.0 knitr_1.50
## [64] ggplot.multistats_1.0.1 UCSC.utils_1.4.0 RhpcBLASctl_0.23-42
## [67] timechange_0.3.0 foreach_1.5.2 dittoSeq_1.20.0
## [70] patchwork_1.3.2 caTools_1.18.3 data.table_1.17.8
## [73] ggtree_3.16.3 R.oo_1.27.1 RSpectra_0.16-2
## [76] irlba_2.3.5.1 fastDummies_1.7.5 gridGraphics_0.5-1
## [79] lazyeval_0.2.2 yaml_2.3.10 survival_3.8-3
## [82] scattermore_1.2 crayon_1.5.3 RcppAnnoy_0.0.22
## [85] RColorBrewer_1.1-3 progressr_0.16.0 later_1.4.4
## [88] ggridges_0.5.7 codetools_0.2-20 GlobalOptions_0.1.2
## [91] aod_1.3.3 KEGGREST_1.48.1 Rtsne_0.17
## [94] shape_1.4.6.1 pkgconfig_2.0.3 TMB_1.9.17
## [97] spatstat.univar_3.1-4 mathjaxr_1.8-0 EnvStats_3.1.0
## [100] aplot_0.2.9 scatterplot3d_0.3-44 spatstat.sparse_3.1-0
## [103] ape_5.8-1 xtable_1.8-4 car_3.1-3
## [106] plyr_1.8.9 httr_1.4.7 rbibutils_2.3
## [109] tools_4.5.1 globals_0.18.0 beeswarm_0.4.0
## [112] broom_1.0.10 nlme_3.1-168 lambda.r_1.2.4
## [115] assertthat_0.2.1 lme4_1.1-37 digest_0.6.37
## [118] numDeriv_2016.8-1.1 Matrix_1.7-4 farver_2.1.2
## [121] tzdb_0.5.0 remaCor_0.0.20 reshape2_1.4.4
## [124] yulab.utils_0.2.1 glue_1.8.0 cachem_1.1.0
## [127] polyclip_1.10-7 Biostrings_2.76.0 mvtnorm_1.3-3
## [130] parallelly_1.45.1 mnormt_2.1.1 statmod_1.5.0
## [133] ragg_1.5.0 RcppHNSW_0.6.0 ScaledMatrix_1.16.0
## [136] carData_3.0-5 minqa_1.2.8 pbapply_1.7-4
## [139] vroom_1.6.6 spam_2.11-1 gson_0.1.0
## [142] gtools_3.9.5 ggsignif_0.6.4 RcppEigen_0.3.4.0.2
## [145] shiny_1.11.1 GenomeInfoDbData_1.2.14 glmmTMB_1.1.12
## [148] R.utils_2.13.0 memoise_2.0.1 rmarkdown_2.30
## [151] scales_1.4.0 R.methodsS3_1.8.2 future_1.67.0
## [154] RANN_2.6.2 spatstat.data_3.1-8 rstudioapi_0.17.1
## [157] cluster_2.1.8.1 mutoss_0.1-13 spatstat.utils_3.2-0
## [160] hms_1.1.3 fitdistrplus_1.2-4 cowplot_1.2.0
## [163] colorspace_2.1-2 rlang_1.1.6 xts_0.14.1
## [166] dotCall64_1.2 ggtangle_0.0.7 laeken_0.5.3
## [169] mgcv_1.9-3 xfun_0.53 e1071_1.7-16
## [172] TH.data_1.1-4 iterators_1.0.14 abind_1.4-8
## [175] GOSemSim_2.34.0 treeio_1.32.0 futile.options_1.0.1
## [178] bitops_1.0-9 Rdpack_2.6.4 promises_1.3.3
## [181] RSQLite_2.4.3 qvalue_2.40.0 sandwich_3.1-1
## [184] fgsea_1.34.2 DelayedArray_0.34.1 proxy_0.4-27
## [187] GO.db_3.21.0 compiler_4.5.1 prettyunits_1.2.0
## [190] boot_1.3-32 beachmat_2.24.0 listenv_0.9.1
## [193] Rcpp_1.1.0 BiocSingular_1.24.0 tensor_1.5.1
## [196] MASS_7.3-65 progress_1.2.3 BiocParallel_1.42.2
## [199] babelgene_22.9 spatstat.random_3.4-2 R6_2.6.1
## [202] fastmap_1.2.0 multcomp_1.4-28 fastmatch_1.1-6
## [205] rstatix_0.7.2 vipor_0.4.7 TTR_0.24.4
## [208] ROCR_1.0-11 TFisher_0.2.0 rsvd_1.0.5
## [211] vcd_1.4-13 nnet_7.3-20 gtable_0.3.6
## [214] KernSmooth_2.23-26 miniUI_0.1.2 deldir_2.0-4
## [217] htmltools_0.5.8.1 ggthemes_5.1.0 bit64_4.6.0-1
## [220] spatstat.explore_3.5-3 lifecycle_1.0.4 blme_1.0-6
## [223] S7_0.2.0 nloptr_2.2.1 sass_0.4.10
## [226] vctrs_0.6.5 robustbase_0.99-6 spatstat.geom_3.6-0
## [229] sn_2.1.1 ggfun_0.2.0 future.apply_1.20.0
## [232] bslib_0.9.0 pillar_1.11.1 gplots_3.2.0
## [235] pcaMethods_2.0.0 locfit_1.5-9.12 jsonlite_2.0.0
## [238] GetoptLong_1.0.5